home *** CD-ROM | disk | FTP | other *** search
- /* lex.c -- C-tools lexical analyzer.
- Copyright (C) 1995 Sandro Sigala */
-
- /* $Id: lex.c,v 1.11 1995/08/08 12:29:23 sandro Exp $ */
-
- /* This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-
-
- #include <ctype.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
-
- #include "lex.h"
- #include "misc.h"
-
- extern FILE *input_file;
-
- int lex_lineno = 1;
- char *lex_token_buffer;
-
- int lex_return_white_spaces = 0;
- int lex_return_directives = 1;
-
- #define MAX_LEXER_BUFFER 64
-
- /* max allowed token size */
- static int maxtoken;
-
- /* char buffer */
- static char lexbuf[MAX_LEXER_BUFFER];
-
- /* char buffer index */
- static int lexbufidx = 0;
-
- #define lexgetc() \
- (lexbufidx ? lexbuf[--lexbufidx] : getc (input_file))
-
- #define lexungetc(c) \
- lexbuf[lexbufidx++] = c
-
- void init_lex (void);
- void done_lex (void);
- int gettoken (void);
- void ungettoken (void);
-
- int gettoken (void);
- static char *extend_token_buffer (char *);
- static int is_reserved_word (char *);
-
- void
- init_lex (void)
- {
- maxtoken = 40;
- lex_token_buffer = (char *) xmalloc (maxtoken + 1);
- }
-
- void
- done_lex (void)
- {
- free (lex_token_buffer);
- }
-
- static char *
- extend_token_buffer (char *p)
- {
- int offset = p - lex_token_buffer;
-
- maxtoken = maxtoken * 2 + 10;
- lex_token_buffer = (char *) xrealloc (lex_token_buffer, maxtoken + 2);
-
- return lex_token_buffer + offset;
- }
-
- static int
- is_reserved_word (char *word)
- {
- static struct { char *w; int t; } wordlist[] =
- {
- {"auto", KW_AUTO}, {"break", KW_BREAK},
- {"case", KW_CASE}, {"char", KW_CHAR},
- {"const", KW_CONST}, {"continue", KW_CONTINUE},
- {"default", KW_DEFAULT}, {"do", KW_DO},
- {"double", KW_DOUBLE}, {"else", KW_ELSE},
- {"enum", KW_ENUM}, {"extern", KW_EXTERN},
- {"float", KW_FLOAT}, {"for", KW_FOR},
- {"goto", KW_GOTO}, {"if", KW_IF},
- {"int", KW_INT}, {"long", KW_LONG},
- {"register", KW_REGISTER}, {"return", KW_RETURN},
- {"short", KW_SHORT}, {"signed", KW_SIGNED},
- {"sizeof", KW_SIZEOF}, {"static", KW_STATIC},
- {"struct", KW_STRUCT}, {"switch", KW_SWITCH},
- {"typedef", KW_TYPEDEF}, {"union", KW_UNION},
- {"unsigned", KW_UNSIGNED}, {"void", KW_VOID},
- {"volatile", KW_VOLATILE}, {"while", KW_WHILE},
- {0, 0}
- };
- int i = 0, t;
-
- while ((t = wordlist[i].t) != 0)
- if (strcmp (wordlist[i++].w, word) == 0)
- return t;
-
- return 0;
- }
-
- int
- gettoken (void)
- {
- int c, c1, c2, i;
- char *p;
-
- lex_token_buffer[0] = '\0';
-
- c = lexgetc ();
-
- if (c == EOF)
- return EOF;
-
- switch (c)
- {
- case '\n':
- lex_lineno++;
-
- case ' ':
- case '\011': /* horizontal tab */
- case '\013': /* vertical tab */
- case '\014': /* form feed */
- case '\015': /* carriage return */
- if (lex_return_white_spaces == 1)
- return c;
- else
- return (gettoken ());
- break;
-
- case '#':
- if (lex_return_directives)
- {
- p = lex_token_buffer;
-
- *p++ = c;
-
- while ((c1 = lexgetc ()) != EOF && c1 != '\n')
- {
- if (p >= lex_token_buffer + maxtoken)
- p = extend_token_buffer (p);
- *p++ = c1;
- }
- *p++ = c1;
- *p = 0;
- return DIRECTIVE;
- }
- else
- return '#';
-
- case 'A': case 'B': case 'C': case 'D': case 'E':
- case 'F': case 'G': case 'H': case 'I': case 'J':
- case 'K': case 'L': case 'M': case 'N': case 'O':
- case 'P': case 'Q': case 'R': case 'S': case 'T':
- case 'U': case 'V': case 'W': case 'X': case 'Y':
- case 'Z':
- case 'a': case 'b': case 'c': case 'd': case 'e':
- case 'f': case 'g': case 'h': case 'i': case 'j':
- case 'k': case 'l': case 'm': case 'n': case 'o':
- case 'p': case 'q': case 'r': case 's': case 't':
- case 'u': case 'v': case 'w': case 'x': case 'y':
- case 'z':
- case '_':
- p = lex_token_buffer;
- while (isalnum (c) || c == '_')
- {
- if (p >= lex_token_buffer + maxtoken)
- p = extend_token_buffer (p);
-
- *p++ = c;
- c = lexgetc ();
- }
- lexungetc (c);
-
- *p = 0;
-
- if ((i = is_reserved_word (lex_token_buffer)) != 0)
- return i;
- else
- return IDENTIFIER;
-
- case '0':
- case '1': case '2': case '3':
- case '4': case '5': case '6':
- case '7': case '8': case '9':
- {
- int firstch = c, start = 0;
- p = lex_token_buffer;
- while (isdigit (c))
- {
- if (p >= lex_token_buffer + maxtoken)
- p = extend_token_buffer (p);
-
- *p++ = c;
- c = lexgetc ();
-
- if (!start && firstch == '0')
- if (c == 'x')
- {
- *p++ = c;
- while (isxdigit (c1 = lexgetc ()))
- {
- if (p >= lex_token_buffer + maxtoken)
- p = extend_token_buffer (p);
- *p++ = c1;
- }
- *p = '\0';
- lexungetc (c1);
- return NUMBER;
- }
- start = 1;
- }
- lexungetc (c);
-
- *p = '\0';
-
- return NUMBER;
- }
-
- case '+':
- if ((c1 = lexgetc ()) == '+')
- return TK_INCREMENT;
- else if (c1 == '=')
- return TK_ADD_ASSIGN;
- else
- {
- lexungetc (c1);
- return '+';
- }
-
- case '-':
- if ((c1 = lexgetc ()) == '-')
- return TK_DECREMENT;
- else if (c1 == '=')
- return TK_SUB_ASSIGN;
- else if (c1 == '>')
- return TK_PTR_OP;
- else
- {
- lexungetc (c1);
- return '-';
- }
-
- case '*':
- if ((c1 = lexgetc ()) == '=')
- return TK_MUL_ASSIGN;
- else
- {
- lexungetc (c1);
- return '*';
- }
-
- case '/':
- if ((c1 = lexgetc ()) == '=')
- return TK_DIV_ASSIGN;
- else
- {
- if (c1 == '*')
- {
- p = lex_token_buffer;
- *p++ = '/';
- *p++ = '*';
- while ((c1 = lexgetc ()) != EOF)
- {
- if (p >= lex_token_buffer + maxtoken)
- p = extend_token_buffer (p);
-
- *p++ = c1;
-
- if (c1 == '*')
- if ((c2 = lexgetc ()) == '/')
- {
- *p++ = c2;
- *p = '\0';
- return COMMENT;
- }
- else
- *p++ = c2;
- }
- *p = '\0';
- }
- lexungetc (c1);
- return '/';
- }
-
- case '%':
- if ((c1 = lexgetc ()) == '=')
- return TK_MOD_ASSIGN;
- else
- {
- lexungetc (c1);
- return '%';
- }
-
- case '!':
- if ((c1 = lexgetc ()) == '=')
- return TK_NE_OP;
- else
- {
- lexungetc (c1);
- return '!';
- }
-
- case '=':
- if ((c1 = lexgetc ()) == '=')
- return TK_EQ_OP;
- else
- {
- lexungetc (c1);
- return '=';
- }
-
- case '&':
- if ((c1 = lexgetc ()) == '=')
- return TK_AND_ASSIGN;
- else if (c1 == '&')
- return TK_AND_OP;
- else
- {
- lexungetc (c1);
- return '&';
- }
-
- case '|':
- if ((c1 = lexgetc ()) == '=')
- return TK_OR_ASSIGN;
- else if (c1 == '|')
- return TK_OR_OP;
- else
- {
- lexungetc (c1);
- return '|';
- }
-
- case '^':
- if ((c1 = lexgetc ()) == '=')
- return TK_XOR_ASSIGN;
- else
- {
- lexungetc (c1);
- return '^';
- }
-
- case '>':
- if ((c1 = lexgetc ()) == '=')
- return TK_GE_OP;
- else if (c1 == '>')
- if ((c2 = lexgetc ()) == '=')
- return TK_LEFT_ASSIGN;
- else
- {
- lexungetc (c2);
- return TK_LEFT_OP;
- }
- else
- {
- lexungetc (c1);
- return '>';
- }
-
- case '<':
- if ((c1 = lexgetc ()) == '=')
- return TK_LE_OP;
- else if (c1 == '<')
- if ((c2 = lexgetc ()) == '=')
- return TK_RIGHT_ASSIGN;
- else
- {
- lexungetc (c2);
- return TK_RIGHT_OP;
- }
- else
- {
- lexungetc (c1);
- return '<';
- }
-
- case '.':
- if ((c1 = lexgetc ()) == '.')
- if ((c2 = lexgetc ()) == '.')
- return TK_ELLIPSIS;
- else
- {
- lexungetc (c2);
- lexungetc (c1);
- return '.';
- }
- else
- {
- lexungetc (c1);
- return '.';
- }
-
- case '"':
- p = lex_token_buffer;
- *p++ = '"';
- while ((c1 = lexgetc ()) != EOF && c1 != '"')
- {
- if (p >= lex_token_buffer + maxtoken)
- p = extend_token_buffer (p);
- *p++ = c1;
-
- if (c1 == '\\')
- *p++ = lexgetc ();
- }
-
- *p++ = '"';
-
- *p = 0;
-
- return STRING;
-
- case '\'':
- p = lex_token_buffer;
- *p++ = '\'';
- while ((c1 = lexgetc ()) != EOF && c1 != '\'')
- {
- if (p >= lex_token_buffer + maxtoken)
- p = extend_token_buffer (p);
- *p++ = c1;
-
- if (c1 == '\\')
- *p++ = lexgetc ();
- }
-
- *p++ = '\'';
-
- *p = 0;
-
- return CHARACTER;
-
- default:
- return c;
- }
- }
-
- /* lex.c ends here */
-